

<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" />
  <meta name="generator" content="Docutils 0.19: https://docutils.sourceforge.io/" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  
  <title>Monitoring Services &mdash; Ceph Documentation</title>
  

  
  <link rel="stylesheet" href="../../../_static/ceph.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/pygments.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/ceph.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/graphviz.css" type="text/css" />
  <link rel="stylesheet" href="../../../_static/css/custom.css" type="text/css" />

  
  

  
  

  

  
  <!--[if lt IE 9]>
    <script src="../../../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
    
      <script type="text/javascript" id="documentation_options" data-url_root="../../../" src="../../../_static/documentation_options.js"></script>
        <script src="../../../_static/jquery.js"></script>
        <script src="../../../_static/_sphinx_javascript_frameworks_compat.js"></script>
        <script data-url_root="../../../" id="documentation_options" src="../../../_static/documentation_options.js"></script>
        <script src="../../../_static/doctools.js"></script>
        <script src="../../../_static/sphinx_highlight.js"></script>
    
    <script type="text/javascript" src="../../../_static/js/theme.js"></script>

    
    <link rel="index" title="Index" href="../../../genindex/" />
    <link rel="search" title="Search" href="../../../search/" />
    <link rel="next" title="SNMP Gateway Service" href="../snmp-gateway/" />
    <link rel="prev" title="Custom Container Service" href="../custom-container/" /> 
</head>

<body class="wy-body-for-nav">

   
  <header class="top-bar">
    <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">
      <li><a href="../../../" class="icon icon-home" aria-label="Home"></a></li>
          <li class="breadcrumb-item"><a href="../../">Cephadm</a></li>
          <li class="breadcrumb-item"><a href="../">Service Management</a></li>
      <li class="breadcrumb-item active">Monitoring Services</li>
      <li class="wy-breadcrumbs-aside">
            <a href="../../../_sources/cephadm/services/monitoring.rst.txt" rel="nofollow"> View page source</a>
      </li>
  </ul>
  <hr/>
</div>
  </header>
  <div class="wy-grid-for-nav">
    
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search"  style="background: #eee" >
          

          
            <a href="../../../" class="icon icon-home"> Ceph
          

          
          </a>

          

          
<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../../../search/" method="get">
    <input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>

          
        </div>

        
        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">
          
            
            
              
            
            
              <ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../../../start/">Ceph 简介</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../install/">安装 Ceph</a></li>
<li class="toctree-l1 current"><a class="reference internal" href="../../">Cephadm</a><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="../../compatibility/">Compatibility and Stability</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../install/">部署个全新的 Ceph 集群</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../adoption/">现有集群切换到 cephadm</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../host-management/">Host Management</a></li>
<li class="toctree-l2 current"><a class="reference internal" href="../">Service Management</a><ul class="current">
<li class="toctree-l3"><a class="reference internal" href="../mon/">MON Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../mgr/">MGR Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../osd/">OSD Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../rgw/">RGW Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../mds/">MDS Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../nfs/">NFS Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../iscsi/">iSCSI Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../custom-container/">Custom Container Service</a></li>
<li class="toctree-l3 current"><a class="current reference internal" href="#">Monitoring Services</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#deploying-monitoring-with-cephadm">Deploying monitoring with cephadm</a></li>
<li class="toctree-l4"><a class="reference internal" href="#enabling-security-for-the-monitoring-stack">Enabling security for the monitoring stack</a></li>
<li class="toctree-l4"><a class="reference internal" href="#deploying-monitoring-without-cephadm">Deploying monitoring without cephadm</a></li>
<li class="toctree-l4"><a class="reference internal" href="#disabling-monitoring">Disabling monitoring</a></li>
<li class="toctree-l4"><a class="reference internal" href="#setting-up-rbd-image-monitoring">Setting up RBD-Image monitoring</a></li>
<li class="toctree-l4"><a class="reference internal" href="#setting-up-prometheus">Setting up Prometheus</a></li>
<li class="toctree-l4"><a class="reference internal" href="#setting-up-grafana">Setting up Grafana</a></li>
<li class="toctree-l4"><a class="reference internal" href="#setting-up-alertmanager">Setting up Alertmanager</a></li>
<li class="toctree-l4"><a class="reference internal" href="#further-reading">Further Reading</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="../snmp-gateway/">SNMP Gateway Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../tracing/">如何追踪各服务</a></li>
<li class="toctree-l3"><a class="reference internal" href="../smb/">SMB Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../mgmt-gateway/">Management Gateway</a></li>
<li class="toctree-l3"><a class="reference internal" href="../oauth2-proxy/">OAuth2 Proxy</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#service-status">Service Status</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#daemon-status">Daemon Status</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#service-specification">Service Specification</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#daemon-placement">Daemon Placement</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#extra-container-arguments">Extra Container Arguments</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#extra-entrypoint-arguments">Extra Entrypoint Arguments</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#custom-config-files">Custom Config Files</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#removing-a-service">Removing a Service</a></li>
<li class="toctree-l3"><a class="reference internal" href="../#disabling-automatic-deployment-of-daemons">Disabling automatic deployment of daemons</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="../../certmgr/">Certificate Management</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../upgrade/">升级 Ceph</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../operations/">Cephadm operations</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../client-setup/">Client Setup</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../troubleshooting/">Troubleshooting</a></li>
<li class="toctree-l2"><a class="reference internal" href="../../../dev/cephadm/">Cephadm Feature Planning</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../../../rados/">Ceph 存储集群</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../cephfs/">Ceph 文件系统</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../rbd/">Ceph 块设备</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../radosgw/">Ceph 对象网关</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../mgr/">Ceph 管理器守护进程</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../mgr/dashboard/">Ceph 仪表盘</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../monitoring/">监控概览</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../api/">API 文档</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../architecture/">体系结构</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dev/developer_guide/">开发者指南</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../dev/internals/">Ceph 内幕</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../governance/">项目管理</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../foundation/">Ceph 基金会</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../ceph-volume/">ceph-volume</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../releases/general/">Ceph 版本（总目录）</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../releases/">Ceph 版本（索引）</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../security/">Security</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../hardware-monitoring/">硬件监控</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../glossary/">Ceph 术语</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../jaegertracing/">Tracing</a></li>
<li class="toctree-l1"><a class="reference internal" href="../../../translation_cn/">中文版翻译资源</a></li>
</ul>

            
          
        </div>
        
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">

      
      <nav class="wy-nav-top" aria-label="top navigation">
        
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../../../">Ceph</a>
        
      </nav>


      <div class="wy-nav-content">
        
        <div class="rst-content">
        
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
            
<div id="dev-warning" class="admonition note">
  <p class="first admonition-title">Notice</p>
  <p class="last">This document is for a development version of Ceph.</p>
</div>
  <div id="docubetter" align="right" style="padding: 5px; font-weight: bold;">
    <a href="https://pad.ceph.com/p/Report_Documentation_Bugs">Report a Documentation Bug</a>
  </div>

  
  <section id="monitoring-services">
<span id="mgr-cephadm-monitoring"></span><h1>Monitoring Services<a class="headerlink" href="#monitoring-services" title="Permalink to this heading"></a></h1>
<p>Ceph Dashboard uses <a class="reference external" href="https://prometheus.io/">Prometheus</a>, <a class="reference external" href="https://grafana.com/">Grafana</a>, and related tools to store and visualize detailed
metrics on cluster utilization and performance.  Ceph users have three options:</p>
<ol class="arabic simple">
<li><p>Have cephadm deploy and configure these services.  This is the default
when bootstrapping a new cluster unless the <code class="docutils literal notranslate"><span class="pre">--skip-monitoring-stack</span></code>
option is used.</p></li>
<li><p>Deploy and configure these services manually.  This is recommended for users
with existing prometheus services in their environment (and in cases where
Ceph is running in Kubernetes with Rook).</p></li>
<li><p>Skip the monitoring stack completely.  Some Ceph dashboard graphs will
not be available.</p></li>
</ol>
<p>The monitoring stack consists of <a class="reference external" href="https://prometheus.io/">Prometheus</a>,
Prometheus exporters (<a class="reference internal" href="../../../mgr/prometheus/#mgr-prometheus"><span class="std std-ref">Prometheus 模块</span></a>, <a class="reference external" href="https://prometheus.io/docs/guides/node-exporter/">Node exporter</a>), <a class="reference external" href="https://prometheus.io/docs/alerting/alertmanager/">Prometheus Alert
Manager</a> and <a class="reference external" href="https://grafana.com/">Grafana</a>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Prometheus’ security model presumes that untrusted users have access to the
Prometheus HTTP endpoint and logs. Untrusted users have access to all the
(meta)data Prometheus collects that is contained in the database, plus a
variety of operational and debugging information.</p>
<p>However, Prometheus’ HTTP API is limited to read-only operations.
Configurations can <em>not</em> be changed using the API and secrets are not
exposed. Moreover, Prometheus has some built-in measures to mitigate the
impact of denial of service attacks.</p>
<p>Please see <cite>Prometheus’ Security model
&lt;https://prometheus.io/docs/operating/security/&gt;</cite> for more detailed
information.</p>
</div>
<section id="deploying-monitoring-with-cephadm">
<h2>Deploying monitoring with cephadm<a class="headerlink" href="#deploying-monitoring-with-cephadm" title="Permalink to this heading"></a></h2>
<p>The default behavior of <code class="docutils literal notranslate"><span class="pre">cephadm</span></code> is to deploy a basic monitoring stack.  It
is however possible that you have a Ceph cluster without a monitoring stack,
and you would like to add a monitoring stack to it. (Here are some ways that
you might have come to have a Ceph cluster without a monitoring stack: You
might have passed the <code class="docutils literal notranslate"><span class="pre">--skip-monitoring</span> <span class="pre">stack</span></code> option to <code class="docutils literal notranslate"><span class="pre">cephadm</span></code> during
the installation of the cluster, or you might have converted an existing
cluster (which had no monitoring stack) to cephadm management.)</p>
<p>To set up monitoring on a Ceph cluster that has no monitoring, follow the
steps below:</p>
<ol class="arabic">
<li><p>Deploy a node-exporter service on every node of the cluster.  The node-exporter provides host-level metrics like CPU and memory utilization:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><style type="text/css">
span.prompt1:before {
  content: "# ";
}
</style><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>apply<span class="w"> </span>node-exporter</span>
</pre></div></div></li>
<li><p>Deploy alertmanager:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>apply<span class="w"> </span>alertmanager</span>
</pre></div></div></li>
<li><p>Deploy Prometheus. A single Prometheus instance is sufficient, but
for high availability (HA) you might want to deploy two:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>apply<span class="w"> </span>prometheus</span>
</pre></div></div><p>or</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>apply<span class="w"> </span>prometheus<span class="w"> </span>--placement<span class="w"> </span><span class="s1">&#39;count:2&#39;</span></span>
</pre></div></div></li>
<li><p>Deploy grafana:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>apply<span class="w"> </span>grafana</span>
</pre></div></div></li>
</ol>
</section>
<section id="enabling-security-for-the-monitoring-stack">
<h2>Enabling security for the monitoring stack<a class="headerlink" href="#enabling-security-for-the-monitoring-stack" title="Permalink to this heading"></a></h2>
<p>By default, in a cephadm-managed cluster, the monitoring components are set up and configured without enabling security measures.
While this suffices for certain deployments, others with strict security needs may find it necessary to protect the
monitoring stack against unauthorized access. In such cases, cephadm relies on a specific configuration parameter,
<cite>mgr/cephadm/secure_monitoring_stack</cite>, which toggles the security settings for all monitoring components. To activate security
measures, set this option to <code class="docutils literal notranslate"><span class="pre">true</span></code> with a command of the following form:</p>
<blockquote>
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>config<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr<span class="w"> </span>mgr/cephadm/secure_monitoring_stack<span class="w"> </span><span class="nb">true</span></span>
</pre></div></div></div></blockquote>
<p>This change will trigger a sequence of reconfigurations across all monitoring daemons, typically requiring
few minutes until all components are fully operational. The updated secure configuration includes the following modifications:</p>
<ol class="arabic simple">
<li><p>Prometheus: basic authentication is required to access the web portal and TLS is enabled for secure communication.</p></li>
<li><p>Alertmanager: basic authentication is required to access the web portal and TLS is enabled for secure communication.</p></li>
<li><p>Node Exporter: TLS is enabled for secure communication.</p></li>
<li><p>Grafana: TLS is enabled and authentication is requiered to access the datasource information.</p></li>
</ol>
<p>In this secure setup, users will need to setup authentication
(username/password) for both Prometheus and Alertmanager. By default the
username and password are set to <code class="docutils literal notranslate"><span class="pre">admin</span></code>/<code class="docutils literal notranslate"><span class="pre">admin</span></code>. The user can change these
value with the commands <code class="docutils literal notranslate"><span class="pre">ceph</span> <span class="pre">orch</span> <span class="pre">prometheus</span> <span class="pre">set-credentials</span></code> and <code class="docutils literal notranslate"><span class="pre">ceph</span>
<span class="pre">orch</span> <span class="pre">alertmanager</span> <span class="pre">set-credentials</span></code> respectively. These commands offer the
flexibility to input the username/password either as parameters or via a JSON
file, which enhances security. Additionally, Cephadm provides the commands
<cite>orch prometheus get-credentials</cite> and <cite>orch alertmanager get-credentials</cite> to
retrieve the current credentials.</p>
<section id="centralized-logging-in-ceph">
<span id="cephadm-monitoring-centralized-logs"></span><h3>Centralized Logging in Ceph<a class="headerlink" href="#centralized-logging-in-ceph" title="Permalink to this heading"></a></h3>
<p>Ceph now provides centralized logging with Loki &amp; Promtail. Centralized Log Management (CLM) consolidates all log data and pushes it to a central repository,
with an accessible and easy-to-use interface. Centralized logging is designed to make your life easier.
Some of the advantages are:</p>
<ol class="arabic simple">
<li><p><strong>Linear event timeline</strong>: it is easier to troubleshoot issues analyzing a single chain of events than thousands of different logs from a hundred nodes.</p></li>
<li><p><strong>Real-time live log monitoring</strong>: it is impractical to follow logs from thousands of different sources.</p></li>
<li><p><strong>Flexible retention policies</strong>: with per-daemon logs, log rotation is usually set to a short interval (1-2 weeks) to save disk usage.</p></li>
<li><p><strong>Increased security &amp; backup</strong>: logs can contain sensitive information and expose usage patterns. Additionally, centralized logging allows for HA, etc.</p></li>
</ol>
<p>Centralized Logging in Ceph is implemented using two new services - <code class="docutils literal notranslate"><span class="pre">loki</span></code> &amp; <code class="docutils literal notranslate"><span class="pre">promtail</span></code>.</p>
<p>Loki: It is basically a log aggregation system and is used to query logs. It can be configured as a datasource in Grafana.</p>
<p>Promtail: It acts as an agent that gathers logs from the system and makes them available to Loki.</p>
<p>These two services are not deployed by default in a Ceph cluster. To enable the centralized logging you can follow the steps mentioned here <a class="reference internal" href="../../../mgr/dashboard/#centralized-logging"><span class="std std-ref">Enable Centralized Logging in Dashboard</span></a>.</p>
</section>
<section id="networks-and-ports">
<span id="cephadm-monitoring-networks-ports"></span><h3>Networks and Ports<a class="headerlink" href="#networks-and-ports" title="Permalink to this heading"></a></h3>
<p>All monitoring services can have the network and port they bind to configured with a yaml service specification. By default
cephadm will use <code class="docutils literal notranslate"><span class="pre">https</span></code> protocol when configuring Grafana daemons unless the user explicitly sets the protocol to <code class="docutils literal notranslate"><span class="pre">http</span></code>.</p>
<p>example spec file:</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">service_type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">grafana</span>
<span class="nt">service_name</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">grafana</span>
<span class="nt">placement</span><span class="p">:</span>
<span class="w">  </span><span class="nt">count</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">1</span>
<span class="nt">networks</span><span class="p">:</span>
<span class="p p-Indicator">-</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">192.169.142.0/24</span>
<span class="nt">spec</span><span class="p">:</span>
<span class="w">  </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">4200</span>
<span class="w">  </span><span class="nt">protocol</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http</span>
</pre></div>
</div>
</section>
<section id="default-images">
<span id="cephadm-default-images"></span><span id="cephadm-monitoring-images"></span><h3>Default images<a class="headerlink" href="#default-images" title="Permalink to this heading"></a></h3>
<p><em>The information in this section was developed by Eugen Block in a thread on
the [ceph-users] mailing list in April of 2024. The thread can be viewed here:
``https://lists.ceph.io/hyperkitty/list/ceph-users&#64;ceph.io/thread/QGC66QIFBKRTPZAQMQEYFXOGZJ7RLWBN/``.</em></p>
<p><code class="docutils literal notranslate"><span class="pre">cephadm</span></code> stores a local copy of the <code class="docutils literal notranslate"><span class="pre">cephadm</span></code> binary in
<code class="docutils literal notranslate"><span class="pre">var/lib/ceph/{FSID}/cephadm.{DIGEST}</span></code>, where <code class="docutils literal notranslate"><span class="pre">{DIGEST}</span></code> is an alphanumeric
string representing the currently-running version of Ceph.</p>
<p>To see the default container images, run below command:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">cephadm<span class="w"> </span>list-images</span>
</pre></div></div><p>Default monitoring images are specified in
<code class="docutils literal notranslate"><span class="pre">/src/python-common/ceph/cephadm/images.py</span></code>.</p>
<dl class="py class">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">ceph.cephadm.images.</span></span><span class="sig-name descname"><span class="pre">DefaultImages</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">value</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">names=&lt;not</span> <span class="pre">given&gt;</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">*values</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">module=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">qualname=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">type=None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">start=1</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">boundary=None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#ceph.cephadm.images.DefaultImages" title="Permalink to this definition"></a></dt>
<dd><dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.ALERTMANAGER">
<span class="sig-name descname"><span class="pre">ALERTMANAGER</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/prometheus/alertmanager:v0.27.0</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.ALERTMANAGER" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.ELASTICSEARCH">
<span class="sig-name descname"><span class="pre">ELASTICSEARCH</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/omrizeneva/elasticsearch:6.8.23</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.ELASTICSEARCH" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.GRAFANA">
<span class="sig-name descname"><span class="pre">GRAFANA</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/ceph/grafana:10.4.8</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.GRAFANA" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.HAPROXY">
<span class="sig-name descname"><span class="pre">HAPROXY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/ceph/haproxy:2.3</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.HAPROXY" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.JAEGER_AGENT">
<span class="sig-name descname"><span class="pre">JAEGER_AGENT</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/jaegertracing/jaeger-agent:1.29</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.JAEGER_AGENT" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.JAEGER_COLLECTOR">
<span class="sig-name descname"><span class="pre">JAEGER_COLLECTOR</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/jaegertracing/jaeger-collector:1.29</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.JAEGER_COLLECTOR" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.JAEGER_QUERY">
<span class="sig-name descname"><span class="pre">JAEGER_QUERY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/jaegertracing/jaeger-query:1.29</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.JAEGER_QUERY" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.KEEPALIVED">
<span class="sig-name descname"><span class="pre">KEEPALIVED</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/ceph/keepalived:2.2.4</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.KEEPALIVED" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.LOKI">
<span class="sig-name descname"><span class="pre">LOKI</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">docker.io/grafana/loki:3.0.0</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.LOKI" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.NGINX">
<span class="sig-name descname"><span class="pre">NGINX</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/ceph/nginx:sclorg-nginx-126</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.NGINX" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.NODE_EXPORTER">
<span class="sig-name descname"><span class="pre">NODE_EXPORTER</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/prometheus/node-exporter:v1.7.0</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.NODE_EXPORTER" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.NVMEOF">
<span class="sig-name descname"><span class="pre">NVMEOF</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/ceph/nvmeof:1.4</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.NVMEOF" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.OAUTH2_PROXY">
<span class="sig-name descname"><span class="pre">OAUTH2_PROXY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/oauth2-proxy/oauth2-proxy:v7.6.0</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.OAUTH2_PROXY" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.PROMETHEUS">
<span class="sig-name descname"><span class="pre">PROMETHEUS</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/prometheus/prometheus:v2.51.0</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.PROMETHEUS" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.PROMTAIL">
<span class="sig-name descname"><span class="pre">PROMTAIL</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">docker.io/grafana/promtail:3.0.0</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.PROMTAIL" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.SAMBA">
<span class="sig-name descname"><span class="pre">SAMBA</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/samba.org/samba-server:devbuilds-centos-amd64</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.SAMBA" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.SAMBA_METRICS">
<span class="sig-name descname"><span class="pre">SAMBA_METRICS</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">quay.io/samba.org/samba-metrics:latest</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.SAMBA_METRICS" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

<dl class="py attribute">
<dt class="sig sig-object py" id="ceph.cephadm.images.DefaultImages.SNMP_GATEWAY">
<span class="sig-name descname"><span class="pre">SNMP_GATEWAY</span></span><em class="property"><span class="w"> </span><span class="p"><span class="pre">=</span></span><span class="w"> </span><span class="pre">docker.io/maxwo/snmp-notifier:v1.2.1</span></em><a class="headerlink" href="#ceph.cephadm.images.DefaultImages.SNMP_GATEWAY" title="Permalink to this definition"></a></dt>
<dd></dd></dl>

</dd></dl>

</section>
<section id="using-custom-images">
<h3>Using custom images<a class="headerlink" href="#using-custom-images" title="Permalink to this heading"></a></h3>
<p>It is possible to install or upgrade monitoring components based on other
images. The ID of the image that you plan to use must be stored in the
configuration. The following configuration options are available:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_prometheus</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_grafana</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_alertmanager</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_node_exporter</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_loki</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_promtail</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_haproxy</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_keepalived</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_snmp_gateway</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_elasticsearch</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_jaeger_agent</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_jaeger_collector</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">container_image_jaeger_query</span></code></p></li>
</ul>
<p>Custom images can be set with the <code class="docutils literal notranslate"><span class="pre">ceph</span> <span class="pre">config</span></code> command. To set custom images, run a command of the following form:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>config<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr<span class="w"> </span>mgr/cephadm/&lt;option_name&gt;<span class="w"> </span>&lt;value&gt;</span>
</pre></div></div><p>For example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>config<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr<span class="w"> </span>mgr/cephadm/container_image_prometheus<span class="w"> </span>prom/prometheus:v1.4.1</span>
</pre></div></div><p>If you were already running monitoring stack daemon(s) of the same image type
that you changed, then you must redeploy the daemon(s) in order to make them
use the new image.</p>
<p>For example, if you changed the Prometheus image, you would have to run the
following command in order to pick up the changes:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>redeploy<span class="w"> </span>prometheus</span>
</pre></div></div><div class="admonition note">
<p class="admonition-title">Note</p>
<p>By setting a custom image, the default value will be overridden (but not
overwritten). The default value will change when an update becomes
available. If you set a custom image, you will not be able automatically
to update the component you have modified with the custom image. You will
need to manually update the configuration (that includes the image name
and the tag) to be able to install updates.</p>
<p>If you choose to accept the recommendations, you can reset the custom
image that you have set before. If you do this, the default value will be
used again.  Use <code class="docutils literal notranslate"><span class="pre">ceph</span> <span class="pre">config</span> <span class="pre">rm</span></code> to reset the configuration option, in
a command of the following form:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>config<span class="w"> </span>rm<span class="w"> </span>mgr<span class="w"> </span>mgr/cephadm/&lt;option_name&gt;</span>
</pre></div></div><p>For example:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>config<span class="w"> </span>rm<span class="w"> </span>mgr<span class="w"> </span>mgr/cephadm/container_image_prometheus</span>
</pre></div></div></div>
<p>See also <a class="reference internal" href="../../install/#cephadm-airgap"><span class="std std-ref">Deployment in an isolated environment</span></a>.</p>
</section>
<section id="using-custom-configuration-files">
<span id="cephadm-overwrite-jinja2-templates"></span><h3>Using custom configuration files<a class="headerlink" href="#using-custom-configuration-files" title="Permalink to this heading"></a></h3>
<p>By overriding cephadm templates, it is possible to completely customize the
configuration files for monitoring services.</p>
<p>Internally, cephadm already uses <a class="reference external" href="https://jinja.palletsprojects.com/en/2.11.x/">Jinja2</a> templates to generate the
configuration files for all monitoring components. Starting from version 17.2.3,
cephadm supports Prometheus http service discovery, and uses this endpoint for the
definition and management of the embedded Prometheus service. The endpoint listens on
<code class="docutils literal notranslate"><span class="pre">https://&lt;mgr-ip&gt;:8765/sd/</span></code> (the port is
configurable through the variable <code class="docutils literal notranslate"><span class="pre">service_discovery_port</span></code>) and returns scrape target
information in <a class="reference external" href="https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config">http_sd_config format</a></p>
<p>Customers with external monitoring stack can use <cite>ceph-mgr</cite> service discovery endpoint
to get scraping configuration. Root certificate of the server can be obtained by the
following command:</p>
<blockquote>
<div><div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>sd<span class="w"> </span>dump<span class="w"> </span>cert</span>
</pre></div></div></div></blockquote>
<p>The configuration of Prometheus, Grafana, or Alertmanager may be customized by storing
a Jinja2 template for each service. This template will be evaluated every time a service
of that kind is deployed or reconfigured. That way, the custom configuration is preserved
and automatically applied on future deployments of these services.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>The configuration of the custom template is also preserved when the default
configuration of cephadm changes. If the updated configuration is to be used,
the custom template needs to be migrated <em>manually</em> after each upgrade of Ceph.</p>
</div>
<section id="option-names">
<h4>Option names<a class="headerlink" href="#option-names" title="Permalink to this heading"></a></h4>
<p>The following templates for files that will be generated by cephadm can be
overridden. These are the names to be used when storing with <code class="docutils literal notranslate"><span class="pre">ceph</span> <span class="pre">config-key</span> <span class="pre">set</span></code>:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">services/alertmanager/alertmanager.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/alertmanager/web.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/grafana/ceph-dashboard.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/grafana/grafana.ini</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/ingress/haproxy.cfg</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/ingress/keepalived.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/iscsi/iscsi-gateway.cfg</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/mgmt-gateway/external_server.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/mgmt-gateway/internal_server.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/mgmt-gateway/nginx.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/nfs/ganesha.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/node-exporter/web.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/nvmeof/ceph-nvmeof.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/oauth2-proxy/oauth2-proxy.conf</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/prometheus/prometheus.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/prometheus/web.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/loki.yml</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/promtail.yml</span></code></p></li>
</ul>
<p>You can look up the file templates that are currently used by cephadm in
<code class="docutils literal notranslate"><span class="pre">src/pybind/mgr/cephadm/templates</span></code>:</p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">services/alertmanager/alertmanager.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/alertmanager/web.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/grafana/ceph-dashboard.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/grafana/grafana.ini.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/ingress/haproxy.cfg.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/ingress/keepalived.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/iscsi/iscsi-gateway.cfg.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/mgmt-gateway/external_server.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/mgmt-gateway/internal_server.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/mgmt-gateway/nginx.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/nfs/ganesha.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/node-exporter/web.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/nvmeof/ceph-nvmeof.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/oauth2-proxy/oauth2-proxy.conf.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/prometheus/prometheus.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/prometheus/web.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/loki.yml.j2</span></code></p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">services/promtail.yml.j2</span></code></p></li>
</ul>
</section>
<section id="usage">
<h4>Usage<a class="headerlink" href="#usage" title="Permalink to this heading"></a></h4>
<p>The following command applies a single line value:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ceph<span class="w"> </span>config-key<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr/cephadm/&lt;option_name&gt;<span class="w"> </span>&lt;value&gt;
</pre></div>
</div>
<p>To set contents of files as template use the <code class="docutils literal notranslate"><span class="pre">-i</span></code> argument:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ceph<span class="w"> </span>config-key<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr/cephadm/&lt;option_name&gt;<span class="w"> </span>-i<span class="w"> </span><span class="nv">$PWD</span>/&lt;filename&gt;
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>When using files as input to <code class="docutils literal notranslate"><span class="pre">config-key</span></code> an absolute path to the file must
be used.</p>
</div>
<p>Then the configuration file for the service needs to be recreated.
This is done using <cite>reconfig</cite>. For more details see the following example.</p>
</section>
<section id="example">
<h4>Example<a class="headerlink" href="#example" title="Permalink to this heading"></a></h4>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># set the contents of ./prometheus.yml.j2 as template</span>
ceph<span class="w"> </span>config-key<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr/cephadm/services/prometheus/prometheus.yml<span class="w"> </span><span class="se">\</span>
<span class="w">  </span>-i<span class="w"> </span><span class="nv">$PWD</span>/prometheus.yml.j2

<span class="c1"># reconfig the prometheus service</span>
ceph<span class="w"> </span>orch<span class="w"> </span>reconfig<span class="w"> </span>prometheus
</pre></div>
</div>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span><span class="c1"># set additional custom alerting rules for Prometheus</span>
ceph<span class="w"> </span>config-key<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr/cephadm/services/prometheus/alerting/custom_alerts.yml<span class="w"> </span><span class="se">\</span>
<span class="w">  </span>-i<span class="w"> </span><span class="nv">$PWD</span>/custom_alerts.yml

<span class="c1"># Note that custom alerting rules are not parsed by Jinja and hence escaping</span>
<span class="c1"># will not be an issue.</span>
</pre></div>
</div>
</section>
</section>
</section>
<section id="deploying-monitoring-without-cephadm">
<h2>Deploying monitoring without cephadm<a class="headerlink" href="#deploying-monitoring-without-cephadm" title="Permalink to this heading"></a></h2>
<p>If you have an existing prometheus monitoring infrastructure, or would like
to manage it yourself, you need to configure it to integrate with your Ceph
cluster.</p>
<ul>
<li><p>Enable the prometheus module in the ceph-mgr daemon</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ceph<span class="w"> </span>mgr<span class="w"> </span>module<span class="w"> </span><span class="nb">enable</span><span class="w"> </span>prometheus
</pre></div>
</div>
<p>By default, ceph-mgr presents prometheus metrics on port 9283 on each host
running a ceph-mgr daemon.  Configure prometheus to scrape these.</p>
</li>
</ul>
<p>To make this integration easier, cephadm provides a service discovery endpoint at
<code class="docutils literal notranslate"><span class="pre">https://&lt;mgr-ip&gt;:8765/sd/</span></code>. This endpoint can be used by an external
Prometheus server to retrieve target information for a specific service. Information returned
by this endpoint uses the format specified by the Prometheus <a class="reference external" href="https://prometheus.io/docs/prometheus/latest/configuration/configuration/#http_sd_config/">http_sd_config option</a></p>
<p>Here’s an example prometheus job definition that uses the cephadm service discovery endpoint</p>
<blockquote>
<div><div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>-<span class="w"> </span>job_name:<span class="w"> </span><span class="s1">&#39;ceph-exporter&#39;</span>
<span class="w">  </span>http_sd_configs:
<span class="w">  </span>-<span class="w"> </span>url:<span class="w"> </span>http://&lt;mgr-ip&gt;:8765/sd/prometheus/sd-config?service<span class="o">=</span>ceph-exporter
</pre></div>
</div>
</div></blockquote>
<ul class="simple">
<li><p>To enable the dashboard’s prometheus-based alerting, see <a class="reference internal" href="../../../mgr/dashboard/#dashboard-alerting"><span class="std std-ref">启用 Prometheus 报警</span></a>.</p></li>
<li><p>To enable dashboard integration with Grafana, see <a class="reference internal" href="../../../mgr/dashboard/#dashboard-grafana"><span class="std std-ref">允许嵌入 Grafana 仪表盘</span></a>.</p></li>
</ul>
</section>
<section id="disabling-monitoring">
<h2>Disabling monitoring<a class="headerlink" href="#disabling-monitoring" title="Permalink to this heading"></a></h2>
<p>To disable monitoring and remove the software that supports it, run the following commands:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>ceph<span class="w"> </span>orch<span class="w"> </span>rm<span class="w"> </span>grafana
<span class="gp">$ </span>ceph<span class="w"> </span>orch<span class="w"> </span>rm<span class="w"> </span>prometheus<span class="w"> </span>--force<span class="w">   </span><span class="c1"># this will delete metrics data collected so far</span>
<span class="gp">$ </span>ceph<span class="w"> </span>orch<span class="w"> </span>rm<span class="w"> </span>node-exporter
<span class="gp">$ </span>ceph<span class="w"> </span>orch<span class="w"> </span>rm<span class="w"> </span>alertmanager
<span class="gp">$ </span>ceph<span class="w"> </span>mgr<span class="w"> </span>module<span class="w"> </span>disable<span class="w"> </span>prometheus
</pre></div>
</div>
<p>See also <a class="reference internal" href="../#orch-rm"><span class="std std-ref">Removing a Service</span></a>.</p>
</section>
<section id="setting-up-rbd-image-monitoring">
<h2>Setting up RBD-Image monitoring<a class="headerlink" href="#setting-up-rbd-image-monitoring" title="Permalink to this heading"></a></h2>
<p>Due to performance reasons, monitoring of RBD images is disabled by default. For more information please see
<a class="reference internal" href="../../../mgr/prometheus/#prometheus-rbd-io-statistics"><span class="std std-ref">Ceph 健康检查</span></a>. If disabled, the overview and details dashboards will stay empty in Grafana
and the metrics will not be visible in Prometheus.</p>
</section>
<section id="setting-up-prometheus">
<h2>Setting up Prometheus<a class="headerlink" href="#setting-up-prometheus" title="Permalink to this heading"></a></h2>
<section id="setting-prometheus-retention-size-and-time">
<h3>Setting Prometheus Retention Size and Time<a class="headerlink" href="#setting-prometheus-retention-size-and-time" title="Permalink to this heading"></a></h3>
<p>Cephadm can configure Prometheus TSDB retention by specifying <code class="docutils literal notranslate"><span class="pre">retention_time</span></code>
and <code class="docutils literal notranslate"><span class="pre">retention_size</span></code> values in the Prometheus service spec.
The retention time value defaults to 15 days (15d). Users can set a different value/unit where
supported units are: ‘y’, ‘w’, ‘d’, ‘h’, ‘m’ and ‘s’. The retention size value defaults
to 0 (disabled). Supported units in this case are: ‘B’, ‘KB’, ‘MB’, ‘GB’, ‘TB’, ‘PB’ and ‘EB’.</p>
<p>In the following example spec we set the retention time to 1 year and the size to 1GB.</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">service_type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">prometheus</span>
<span class="nt">placement</span><span class="p">:</span>
<span class="w">  </span><span class="nt">count</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">1</span>
<span class="nt">spec</span><span class="p">:</span>
<span class="w">  </span><span class="nt">retention_time</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;1y&quot;</span>
<span class="w">  </span><span class="nt">retention_size</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;1GB&quot;</span>
</pre></div>
</div>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>If you already had Prometheus daemon(s) deployed before and are updating an
existent spec as opposed to doing a fresh Prometheus deployment, you must also
tell cephadm to redeploy the Prometheus daemon(s) to put this change into effect.
This can be done with a <code class="docutils literal notranslate"><span class="pre">ceph</span> <span class="pre">orch</span> <span class="pre">redeploy</span> <span class="pre">prometheus</span></code> command.</p>
</div>
</section>
</section>
<section id="setting-up-grafana">
<h2>Setting up Grafana<a class="headerlink" href="#setting-up-grafana" title="Permalink to this heading"></a></h2>
<section id="manually-setting-the-grafana-url">
<h3>Manually setting the Grafana URL<a class="headerlink" href="#manually-setting-the-grafana-url" title="Permalink to this heading"></a></h3>
<p>Cephadm automatically configures Prometheus, Grafana, and Alertmanager in
all cases except one.</p>
<p>In a some setups, the Dashboard user’s browser might not be able to access the
Grafana URL that is configured in Ceph Dashboard. This can happen when the
cluster and the accessing user are in different DNS zones.</p>
<p>If this is the case, you can use a configuration option for Ceph Dashboard
to set the URL that the user’s browser will use to access Grafana. This
value will never be altered by cephadm. To set this configuration option,
issue the following command:</p>
<blockquote>
<div><div class="highlight-default notranslate"><div class="highlight"><pre><style type="text/css">
span.prompt2:before {
  content: "$ ";
}
</style><span class="prompt2">ceph<span class="w"> </span>dashboard<span class="w"> </span>set-grafana-frontend-api-url<span class="w"> </span>&lt;grafana-server-api&gt;</span>
</pre></div></div></div></blockquote>
<p>It might take a minute or two for services to be deployed. After the
services have been deployed, you should see something like this when you issue the command <code class="docutils literal notranslate"><span class="pre">ceph</span> <span class="pre">orch</span> <span class="pre">ls</span></code>:</p>
<div class="highlight-console notranslate"><div class="highlight"><pre><span></span><span class="gp">$ </span>ceph<span class="w"> </span>orch<span class="w"> </span>ls
<span class="go">NAME           RUNNING  REFRESHED  IMAGE NAME                                      IMAGE ID        SPEC</span>
<span class="go">alertmanager       1/1  6s ago     docker.io/prom/alertmanager:latest              0881eb8f169f  present</span>
<span class="go">crash              2/2  6s ago     docker.io/ceph/daemon-base:latest-master-devel  mix           present</span>
<span class="go">grafana            1/1  0s ago     docker.io/pcuzner/ceph-grafana-el8:latest       f77afcf0bcf6   absent</span>
<span class="go">node-exporter      2/2  6s ago     docker.io/prom/node-exporter:latest             e5a616e4b9cf  present</span>
<span class="go">prometheus         1/1  6s ago     docker.io/prom/prometheus:latest                e935122ab143  present</span>
</pre></div>
</div>
</section>
<section id="configuring-ssl-tls-for-grafana">
<h3>Configuring SSL/TLS for Grafana<a class="headerlink" href="#configuring-ssl-tls-for-grafana" title="Permalink to this heading"></a></h3>
<p><code class="docutils literal notranslate"><span class="pre">cephadm</span></code> deploys Grafana using the certificate defined in the ceph
key/value store. If no certificate is specified, <code class="docutils literal notranslate"><span class="pre">cephadm</span></code> generates a
self-signed certificate during the deployment of the Grafana service. Each
certificate is specific for the host it was generated on.</p>
<p>A custom certificate can be configured using the following commands:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>config-key<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr/cephadm/<span class="o">{</span>hostname<span class="o">}</span>/grafana_key<span class="w"> </span>-i<span class="w"> </span><span class="nv">$PWD</span>/key.pem</span>
<span class="prompt1">ceph<span class="w"> </span>config-key<span class="w"> </span><span class="nb">set</span><span class="w"> </span>mgr/cephadm/<span class="o">{</span>hostname<span class="o">}</span>/grafana_crt<span class="w"> </span>-i<span class="w"> </span><span class="nv">$PWD</span>/certificate.pem</span>
</pre></div></div><p>Where <cite>hostname</cite> is the hostname for the host where grafana service is deployed.</p>
<p>If you have already deployed Grafana, run <code class="docutils literal notranslate"><span class="pre">reconfig</span></code> on the service to
update its configuration:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>reconfig<span class="w"> </span>grafana</span>
</pre></div></div><p>The <code class="docutils literal notranslate"><span class="pre">reconfig</span></code> command also sets the proper URL for Ceph Dashboard.</p>
</section>
<section id="setting-the-initial-admin-password">
<h3>Setting the initial admin password<a class="headerlink" href="#setting-the-initial-admin-password" title="Permalink to this heading"></a></h3>
<p>By default, Grafana will not create an initial
admin user. In order to create the admin user, please create a file
<code class="docutils literal notranslate"><span class="pre">grafana.yaml</span></code> with this content:</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">service_type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">grafana</span>
<span class="nt">spec</span><span class="p">:</span>
<span class="w">  </span><span class="nt">initial_admin_password</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">mypassword</span>
</pre></div>
</div>
<p>Then apply this specification:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>ceph<span class="w"> </span>orch<span class="w"> </span>apply<span class="w"> </span>-i<span class="w"> </span>grafana.yaml
ceph<span class="w"> </span>orch<span class="w"> </span>redeploy<span class="w"> </span>grafana
</pre></div>
</div>
<p>Grafana will now create an admin user called <code class="docutils literal notranslate"><span class="pre">admin</span></code> with the
given password.</p>
</section>
<section id="turning-off-anonymous-access">
<h3>Turning off anonymous access<a class="headerlink" href="#turning-off-anonymous-access" title="Permalink to this heading"></a></h3>
<p>By default, cephadm allows anonymous users (users who have not provided any
login information) limited, viewer only access to the grafana dashboard. In
order to set up grafana to only allow viewing from logged in users, you can
set <code class="docutils literal notranslate"><span class="pre">anonymous_access:</span> <span class="pre">False</span></code> in your grafana spec.</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">service_type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">grafana</span>
<span class="nt">placement</span><span class="p">:</span>
<span class="w">  </span><span class="nt">hosts</span><span class="p">:</span>
<span class="w">  </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">host1</span>
<span class="nt">spec</span><span class="p">:</span>
<span class="w">  </span><span class="nt">anonymous_access</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">False</span>
<span class="w">  </span><span class="nt">initial_admin_password</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;mypassword&quot;</span>
</pre></div>
</div>
<p>Since deploying grafana with anonymous access set to false without an initial
admin password set would make the dashboard inaccessible, cephadm requires
setting the <code class="docutils literal notranslate"><span class="pre">initial_admin_password</span></code> when <code class="docutils literal notranslate"><span class="pre">anonymous_access</span></code> is set to false.</p>
</section>
</section>
<section id="setting-up-alertmanager">
<h2>Setting up Alertmanager<a class="headerlink" href="#setting-up-alertmanager" title="Permalink to this heading"></a></h2>
<section id="adding-alertmanager-webhooks">
<h3>Adding Alertmanager webhooks<a class="headerlink" href="#adding-alertmanager-webhooks" title="Permalink to this heading"></a></h3>
<p>To add new webhooks to the Alertmanager configuration, add additional
webhook urls like so:</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">service_type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">alertmanager</span>
<span class="nt">spec</span><span class="p">:</span>
<span class="w">  </span><span class="nt">user_data</span><span class="p">:</span>
<span class="w">    </span><span class="nt">default_webhook_urls</span><span class="p">:</span>
<span class="w">    </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;https://foo&quot;</span>
<span class="w">    </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;https://bar&quot;</span>
</pre></div>
</div>
<p>Where <code class="docutils literal notranslate"><span class="pre">default_webhook_urls</span></code> is a list of additional URLs that are
added to the default receivers’ <code class="docutils literal notranslate"><span class="pre">&lt;webhook_configs&gt;</span></code> configuration.</p>
<p>Run <code class="docutils literal notranslate"><span class="pre">reconfig</span></code> on the service to update its configuration:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>reconfig<span class="w"> </span>alertmanager</span>
</pre></div></div></section>
<section id="turn-on-certificate-validation">
<h3>Turn on Certificate Validation<a class="headerlink" href="#turn-on-certificate-validation" title="Permalink to this heading"></a></h3>
<p>If you are using certificates for alertmanager and want to make sure
these certs are verified, you should set the “secure” option to
true in your alertmanager spec (this defaults to false).</p>
<div class="highlight-yaml notranslate"><div class="highlight"><pre><span></span><span class="nt">service_type</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">alertmanager</span>
<span class="nt">spec</span><span class="p">:</span>
<span class="w">  </span><span class="nt">secure</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span>
</pre></div>
</div>
<p>If you already had alertmanager daemons running before applying the spec
you must reconfigure them to update their configuration</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span class="prompt1">ceph<span class="w"> </span>orch<span class="w"> </span>reconfig<span class="w"> </span>alertmanager</span>
</pre></div></div></section>
</section>
<section id="further-reading">
<h2>Further Reading<a class="headerlink" href="#further-reading" title="Permalink to this heading"></a></h2>
<ul class="simple">
<li><p><a class="reference internal" href="../../../mgr/prometheus/#mgr-prometheus"><span class="std std-ref">Prometheus 模块</span></a></p></li>
</ul>
</section>
</section>



<div id="support-the-ceph-foundation" class="admonition note">
  <p class="first admonition-title">Brought to you by the Ceph Foundation</p>
  <p class="last">The Ceph Documentation is a community resource funded and hosted by the non-profit <a href="https://ceph.io/en/foundation/">Ceph Foundation</a>. If you would like to support this and our other efforts, please consider <a href="https://ceph.io/en/foundation/join/">joining now</a>.</p>
</div>


           </div>
           
          </div>
          <footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
        <a href="../custom-container/" class="btn btn-neutral float-left" title="Custom Container Service" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
        <a href="../snmp-gateway/" class="btn btn-neutral float-right" title="SNMP Gateway Service" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
    </div>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2016, Ceph authors and contributors. Licensed under Creative Commons Attribution Share Alike 3.0 (CC-BY-SA-3.0).</p>
  </div>

   

</footer>
        </div>
      </div>

    </section>

  </div>
  

  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>

  
  
    
   

</body>
</html>